# bash it in root path
export TORCHINDUCTOR_COMPILE_THREADS=1
export WANDB_MODE=offline
PYTHON_PATH='./' accelerate launch --multi_gpu --gpu_ids '0,1,2,3' --main_process_port 25000 --num_processes 4 train/train_unified.py \
        --output_dir "output/load512_512_gray_image_notext_0.1_seg_4A6000" \
        --train_batch_size 16 \
        --gradient_accumulation_steps 8 \
        --learning_rate 1e-4 \
        --text_loss_weight 0.1 \
        --max_grad_norm 10 \
        --pretrained_model_name_or_path "/home/101/u101004/.cache/huggingface/hub/models--MeissonFlow--Meissonic/snapshots/7ee5068bcebaaf6165e5d77c2969aa3b38e17b38/" \
        --pretrained_transformer_path "/home/101/u101004/.cache/huggingface/hub/models--MeissonFlow--Muddit/snapshots/ce16782f0e2bc3e5cd03ea6a14df1f94587b0f93/" \
        --text_encoder_architecture 'open_clip' \
        --instance_dataset 'COCODataset' \
        --instance_data_dir  '/home/101/u101004/user_gbh/data/' \
        --image_key 'image' \
        --prompt_key 'text' \
        --resume_from_checkpoint "latest" \
        --resolution 512 \
        --mixed_precision fp16 \
        --lr_scheduler constant \
        --use_8bit_adam \
        --dataloader_num_workers 0 \
        --validation_prompts \
            'a boy' \
        --max_train_steps 100000 \
        --checkpointing_steps 200 \
        --validation_steps 100 \
        --report_to 'wandb' \
        --logging_steps 10